from RE_init import *
import string
from collections import Counter

def read_data(file_input, dataset="[DEFAULT_DATASET]", delim=",", LOAD_ANNOTATIONS=False):
    """
    Read data from a file into a pandas DataFrame.
    
    Args:
        file_input (str): Path to input file
        dataset (str): Dataset name
        delim (str): Delimiter for the file
        LOAD_ANNOTATIONS (bool): Whether to load annotations
        
    Returns:
        pandas.DataFrame: DataFrame containing the data
    """
    print file_input
    if dataset == "[DATASET_1]":      
        ff = open(file_input)
        h = ff.readline()
        header_orig = h.split(delim)
        df = pd.read_csv(file_input, delimiter=delim, header=0)
        
        df['tweet_posted_time'] = df['tweet_posted_time'].apply(
            lambda x: datetime.strptime(x.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        )
        
        selected_columns = [
            'tweet_posted_time', 'tweet_text', 'main_tweet', 'ollie_conf',
            'ollie_arg1', 'ollie_rel', 'ollie_arg2', 'clean_tweet_polarity', 
            'clean_tweet_subjectivity'
        ]
        
        df_selected = df[[i for i in df.columns if i in selected_columns]]
        df_selected = df_selected.dropna(how='any')
        
        print "Number of instances: "    
        print len(df_selected.index)
        
        return df_selected
    
    if dataset == "[DATASET_2]":
        df = pd.read_csv(file_input, delimiter=delim, header=0, error_bad_lines=False)
        df.rename(columns={'[SOURCE_TEXT_COLUMN]': 'text'}, inplace=True)
        return df
    
    # Default case for other datasets
    ff = open(file_input)
    df = pd.read_csv(file_input, delimiter=delim, header=0, error_bad_lines=False, quoting=3)  # Ignore field quoting behaviors
    return df


def get_file_input(DATA_SET):
    """
    Get the input file path for a given dataset.
    
    Args:
        DATA_SET (str): Dataset name
        
    Returns:
        str: Path to the input file
    """
    if DATA_SET == "[DATASET_1]":
        based_dir = data_dir + '[DATA_SUBDIR_1]/'
        file_input_name = '[INPUT_FILENAME_1]'
        file_input = based_dir + file_input_name      
        df = read_data(file_input, "[DATASET_1]", ",")
        texts = df['text'].tolist()

    if DATA_SET == "[DATASET_2]":
        based_dir = data_dir + '[DATA_SUBDIR_1]/'
        file_input_name = '[INPUT_FILENAME_2]'
        file_input = based_dir + file_input_name      
        df = read_data(file_input, "[DATASET_2]", "\n")
        texts = df['text'].tolist()  

    elif DATA_SET == "[DATASET_3]":
        based_dir = data_dir + '[DATA_SUBDIR_2]/'
        file_input_name = '[INPUT_FILENAME_3]'
        file_input = based_dir + file_input_name

    return file_input
    

def save_pairwise_rels(file_loc, g, print_option=True):
    """
    Save pairwise relations from a graph to a file.
    
    Args:
        file_loc (str): Path to output file
        g (networkx.Graph): Graph containing relations
        print_option (bool): Whether to print relations
        
    Returns:
        None
    """
    f = open(file_loc, 'w')
    nodes = g.nodes()
    for n1 in nodes:
        for n2 in nodes:
            if n1 is not n2:
                l = g.get_edge_data(n1, n2)
                if l:
                    line = str(n1) + "\t" + str(n2) + "\t" + str(l) + "\n"
                    f.write(line)
                    if print_option:
                        print n1, n2, l    
    f.close()


def plot_argument_graph(g, path_to_file=None):
    """
    Plot an argument graph.
    
    Args:
        g (networkx.Graph): Graph to plot
        path_to_file (str): Path to save the plot
        
    Returns:
        None
    """
    A = nx.nx_agraph.to_agraph(g)
    A.layout('dot', args='-Nfontsize=10 -Nwidth=".2" -Nheight=".2" -Nmargin=0 -Gfontsize=8')
    d = draw(g, show='ipynb')
    
    if path_to_file:
        with open(path_to_file, "wb") as png:
            A.draw(path_to_file)


def plot_dep(g, title):
    """
    Plot a dependency graph.
    
    Args:
        g (networkx.Graph): Dependency graph to plot
        title (str): Title for the plot
        
    Returns:
        None
    """
    A = nx.nx_agraph.to_agraph(g)
    
    A.layout('dot', args='-Nfontsize=10 -Nwidth=".2" -Nheight=".2" -Nmargin=0 -Gfontsize=8')
    A.draw('[TEMP_IMAGE_PATH]')
    d = draw(g, show='ipynb')
    display(d)
    

def print_relations(rels):
    """
    Print relations in a readable format.
    
    Args:
        rels (list): List of relations to print
        
    Returns:
        None
    """
    if len(rels) < 1:
        print "No extraction."
        return
        
    for ind, r in enumerate(rels):
        print ">Extraction Number: ", ind+1, " - ", "Pattern: ", r["type"], " - relation : (", r["arg1"], ", ", r["rel"], ", ", r["arg2"], ")"
        if "arg1_prepositions" in r and "rel_prepositions" in r and "arg2_prepositions" in r:
            if r["arg1_prepositions"]:
                print " arg1_prep: ", r["arg1_prepositions"],
            if r["rel_prepositions"]:
                print " rel_prep: ", r["rel_prepositions"],
            if r["arg2_prepositions"]:
                print " arg2_prep: ", r["arg2_prepositions"]   

        print "\n\n"


def get_rels_str(rels):
    """
    Get string representation of relations.
    
    Args:
        rels (list): List of relations
        
    Returns:
        list: List of relation strings
    """
    if len(rels) < 1:
        return []
        
    rels_str = []
    for r in rels:
        r_str = "( " + r["arg1"] + ", " + r["rel"] + ", " + r["arg2"] + " )"
        rels_str.append(r_str)
        
    return rels_str
    

def saveToFile_rows(outputLoc, inputList, delim):
    """
    Save a list of rows to a file.
    
    Args:
        outputLoc (str): Path to output file
        inputList (list): List of rows to save
        delim (str): Delimiter for the file
        
    Returns:
        None
    """
    with open(outputLoc, "wb") as f:
        writer = csv.writer(f, delimiter=delim)
        writer.writerows(inputList)   
        

def print_top_relations(all_rels, output_file, top_num=-1):
    """
    Print top relations to a file.
    
    Args:
        all_rels (list): List of all relations
        output_file (str): Path to output file
        top_num (int): Number of top relations to print (-1 for all)
        
    Returns:
        None
    """
    f = open(output_file, 'w')
    cnt = Counter()
    
    for r in all_rels:
        cnt[r] += 1
        
    if top_num == -1:  # Print all
        print >>f, "Frequent relations:"
        for letter, count in cnt.most_common():
            print >>f, letter, ": ", count
    else:
        print >>f, "Top ", top_num, " frequent relations:"
        for letter, count in cnt.most_common(top_num):
            print >>f, letter, ": ", count                 


def save_pairwise_relations_with_node_selection(df_rels, entity_versions, output_file):
    """
    Save pairwise relations with node selection to a file.
    
    Args:
        df_rels (pandas.DataFrame): DataFrame of relations
        entity_versions (dict): Dictionary of entity versions
        output_file (str): Path to output file
        
    Returns:
        None
    """
    f = open(output_file, 'w')
    
    for entity in entity_versions:
        print >>f, "-------------------------"
        print >>f, "       ", entity
        print >>f, "-------------------------"    
        
        for ent_one_version in entity_versions[entity]:
            print >>f, "\n\n**** ", ent_one_version, " ****"
            df_all_versions = defaultdict(list)
            df_one_version = df_rels[np.logical_or(
                df_rels['arg1'].str.contains(ent_one_version),
                df_rels['arg2'].str.contains(ent_one_version)
            )]
            
            list_one_version = df_one_version['rel'].tolist()
            cnt = Counter()
            
            for r in list_one_version:
                cnt[r] += 1
                
            print >>f, "Frequent relations:"
            for letter, count in cnt.most_common():
                print >>f, letter, ": ", count             


def rel_to_stemRel(r):
    """
    Convert a relation to its stemmed version.
    
    Args:
        r (str): Relation string
        
    Returns:
        str: Stemmed relation string
    """
    stemmer = SnowballStemmer("english")
    r_new = ""
    
    # Remove {} inside the << >> to not mistakenly take it as the head noun
    r_has_less_than_equal = re.search(r'\<<(.*)\>>', r)
    if r_has_less_than_equal:
        r = r.split("<<")[0] + re.search(r'\<<(.*)\>>', r).group(0).replace("{", "").replace("}", "") + r.split(">>")[1]
        
    if is_entity_present(r.replace("{", "").replace("}", ""), "not") or "cannot" in r:        
        rel_head = re.search(r'\{(.*)\}', r).group(1).replace("{", "").replace("}", "")
        rel_head = stemmer.stem(rel_head) 
        rel_head = get_relation_representative(rel_head, dataset="[DEFAULT_DATASET]")
        r_new = r.split("{")[0] + "{" + rel_head + "}" + ''.join([x for i, x in enumerate(r.split("}")) if i > 0])
    else:
        rel_head = re.search(r'\{(.*)\}', r).group(1).replace("{", "").replace("}", "")
        rel_head = stemmer.stem(rel_head) 
        rel_head = get_relation_representative(rel_head, dataset="[DEFAULT_DATASET]")
        r_new = "{" + rel_head + "}"    
        
    return r_new


def get_top_extractions(df_rels, output_file=None, top_num=-1, save_to_file=False, stem_rels=False, just_head_arg=False):
    """
    Get top extractions from relations.
    
    Args:
        df_rels (pandas.DataFrame): DataFrame of relations
        output_file (str): Path to output file
        top_num (int): Number of top extractions to get (-1 for all)
        save_to_file (bool): Whether to save to file
        stem_rels (bool): Whether to stem relations
        just_head_arg (bool): Whether to use just head arguments
        
    Returns:
        pandas.DataFrame: DataFrame of top extractions
    """
    cnt = Counter()
    for ind, r in df_rels.iterrows():
        # Get arg1, and arg2 headwords
        if just_head_arg:
            arg1_simp = "{" + re.search(r'\{(.*)\}', r["arg1"].strip()).group(1) + "}"
            arg2_simp = "{" + re.search(r'\{(.*)\}', r["arg2"].strip()).group(1) + "}"
        else:
            arg1_simp = r["arg1"].strip()
            arg2_simp = r["arg2"].strip()
            
        # Get stem version of rel
        if stem_rels:
            rel_simp = rel_to_stemRel(r["rel"].strip())
        else:
            rel_simp = r["rel"].strip()
            
        key_str = arg1_simp + ";" + rel_simp + ";" + arg2_simp
        cnt[key_str] += 1

    list_aggregated_rels = []
    header_aggregated_rels = ["relation tuple", "counts"]
    
    if top_num == -1:    
        for k, v in cnt.most_common():
            list_aggregated_rels.append([k, v])  
    else:
        for k, v in cnt.most_common(top_num):
            list_aggregated_rels.append([k, v])         
    
    df = pd.DataFrame(list_aggregated_rels, columns=header_aggregated_rels)
    
    if output_file is not None and save_to_file:
        df.to_csv(output_file + '_rels_aggregated.csv', index=False) 
    
    return df
         

def get_top_entities(df_rels, output_file=None, top_num=-1, save_to_file=False, just_head_arg=True):
    """
    Get top entities from relations.
    
    Args:
        df_rels (pandas.DataFrame): DataFrame of relations
        output_file (str): Path to output file
        top_num (int): Number of top entities to get (-1 for all)
        save_to_file (bool): Whether to save to file
        just_head_arg (bool): Whether to use just head arguments
        
    Returns:
        pandas.DataFrame: DataFrame of top entities
    """
    entities = []
    if just_head_arg:
        for ind, item in df_rels.iterrows():
            if "{" not in item["arg1"]:
                entities.append("{" + item["arg1"].strip() + "}")
            else:
                entities.append("{" + re.search(r'\{(.*)\}', item["arg1"].strip()).group(1) + "}")
            if "{" not in item["arg2"]:
                entities.append("{" + item["arg2"].strip() + "}")
            else:
                entities.append("{" + re.search(r'\{(.*)\}', item["arg2"].strip()).group(1) + "}")
    else:
        entities = list(df_rels['arg1']) + list(df_rels['arg2'])
        
    cols = ['entity', 'pos', 'frequency']
    df_entity_rankings = pd.DataFrame(columns=cols)
    cnt = Counter()
    
    for e in entities:
        cnt[e] += 1
        
    if top_num == -1:  # Print all
        for letter, count in cnt.most_common():
            letter_no_bracket = letter.replace("{", "").replace("}", "")
            if letter_no_bracket:
                letter_pos = nltk.tag.pos_tag([letter_no_bracket])
                df_entity_rankings.loc[len(df_entity_rankings)] = [letter, letter_pos[0][1], count]
    else:
        for letter, count in cnt.most_common(top_num):
            letter_no_bracket = letter.replace("{", "").replace("}", "")
            if letter_no_bracket:
                letter_pos = nltk.tag.pos_tag([letter_no_bracket])
                df_entity_rankings.loc[len(df_entity_rankings)] = [letter, letter_pos[0][1], count]
    
    if output_file is not None and save_to_file:
        f = open(output_file, 'w')
        df_entity_rankings.to_csv(output_file, sep=',', encoding='utf-8', header=True, columns=cols)      
        
    return df_entity_rankings


def get_top_relations(df_rels, output_file=None, top_num=-1, save_to_file=False, stem_rels=True, dataset="[DEFAULT_DATASET]"):
    """
    Get top relations from a DataFrame.
    
    Args:
        df_rels (pandas.DataFrame): DataFrame of relations
        output_file (str): Path to output file
        top_num (int): Number of top relations to get (-1 for all)
        save_to_file (bool): Whether to save to file
        stem_rels (bool): Whether to stem relations
        dataset (str): Dataset name
        
    Returns:
        pandas.DataFrame: DataFrame of top relations
    """
    stemmer = SnowballStemmer("english")
    
    relations = []
    
    for ind, item in df_rels.iterrows():
        r = item["rel"]
        r_new = ""
        
        # If there is no head noun specified in the relation phrase, take the whole phrase as the main content
        if "{" not in r:
            r_new = stemmer.stem(r)
            r_new = get_relation_representative(r_new, dataset)
            r_new = "{" + r_new + "}"
            continue
        
        # If we have the head words ({...})
        # Remove the {} inside the << >> to not mistakenly take it as the head noun
        r_has_less_than_equal = re.search(r'\<<(.*)\>>', r)
        if r_has_less_than_equal:
            r = r.split("<<")[0] + re.search(r'\<<(.*)\>>', r).group(0).replace("{", "").replace("}", "") + r.split(">>")[1]
            
        if is_entity_present(r.replace("{", "").replace("}", ""), "not") or "cannot" in r:
            rel_head = re.search(r'\{(.*)\}', r).group(1).replace("{", "").replace("}", "")
            rel_head = stemmer.stem(rel_head) 
            rel_head = get_relation_representative(rel_head, dataset)
            r_new = r.split("{")[0] + "{" + rel_head + "}" + ''.join([x for i, x in enumerate(r.split("}")) if i > 0])
        else:
            rel_head = re.search(r'\{(.*)\}', r).group(1).replace("{", "").replace("}", "")
            rel_head = stemmer.stem(rel_head) 
            rel_head = get_relation_representative(rel_head, dataset)
            r_new = "{" + rel_head + "}"
        
        relations.append(r_new)
    
    cols = ['relation', 'frequency']
    df_relation_rankings = pd.DataFrame(columns=cols)
    cnt = Counter()
    
    for r in relations:
        cnt[r] += 1
        
    if top_num == -1:  # Print all
        for letter, count in cnt.most_common():
            df_relation_rankings.loc[len(df_relation_rankings)] = [letter, count]
    else:
        for letter, count in cnt.most_common(top_num):
            df_relation_rankings.loc[len(df_relation_rankings)] = [letter, count]

    if output_file is not None and save_to_file:
        f = open(output_file, 'w')
        df_relation_rankings.to_csv(output_file, sep=',', encoding='utf-8', header=True, columns=cols)      
        
    return df_relation_rankings


def get_relation_representative(r, dataset="[DEFAULT_DATASET]"):
    """
    Get a representative form for a relation.
    
    Args:
        r (str): Relation string
        dataset (str): Dataset name
        
    Returns:
        str: Representative form of the relation
    """
    rel_to_representative_mapping = get_relation_versions_reverse_mapping(dataset)
    res = r
    if len(rel_to_representative_mapping[r]) > 0:
        res = rel_to_representative_mapping[r]
    return res


def print_full(df):
    """
    Print full DataFrame without truncation.
    
    Args:
        df (pandas.DataFrame): DataFrame to print
        
    Returns:
        None
    """
    pd.set_option('display.max_rows', len(df))
    print(df)
    pd.reset_option('display.max_rows')


def error_msg(error_type):
    """
    Get an error message based on error type.
    
    Args:
        error_type (str): Type of error
        
    Returns:
        str: Error message
    """
    if error_type == "tokenizer":
        return "Tokenizer failed during parsing, Ex. there might be a dash in the sentence!"
    

def get_relation_versions_reverse_mapping(dataset="[DEFAULT_DATASET]"):
    """
    Get reverse mapping of relation versions.
    
    Args:
        dataset (str): Dataset name
        
    Returns:
        dict: Dictionary mapping relation versions to canonical relations
    """
    relation_versions = get_relation_versions(dataset)
    relation_versions_reverse_mapping = defaultdict(list)
    
    for rel_glob_name, rel_version_list in relation_versions.iteritems():
        for ind in range(len(rel_version_list)):
            relation_versions_reverse_mapping[rel_version_list[ind]] = rel_glob_name 
    
    return relation_versions_reverse_mapping
    

def get_relation_versions(dataset="[DEFAULT_DATASET]"):
    """
    Get different versions of relations for a dataset.
    
    Args:
        dataset (str): Dataset name
        
    Returns:
        dict: Dictionary mapping canonical relations to their versions
    """
    relation_versions = defaultdict(list)
    
    if dataset == "[DEFAULT_DATASET]":
        relation_versions['[RELATION_1]'] = ['[VERSION_1A]', '[VERSION_1B]', '[VERSION_1C]', '[VERSION_1D]']
        relation_versions['[RELATION_2]'] = ['[VERSION_2A]', '[VERSION_2B]']
        relation_versions['[RELATION_3]'] = ['[VERSION_3A]', '[VERSION_3B]']
        relation_versions['[RELATION_4]'] = ['[VERSION_4A]', '[VERSION_4B]', '[VERSION_4C]', '[VERSION_4D]']
        
    # Make everything lowercase
    for rel_glob_name, rel_version_list in relation_versions.iteritems():
        for ind in range(len(rel_version_list)):
            rel_version_list[ind] = rel_version_list[ind].lower()
            
    return relation_versions
    

def get_entity_versions(dataset="[DEFAULT_DATASET]"):
    """
    Get different versions of entities for a dataset.
    
    Args:
        dataset (str): Dataset name
        
    Returns:
        dict: Dictionary mapping canonical entities to their versions
    """
    entity_versions = defaultdict(list)
    
    if dataset == "[DEFAULT_DATASET]":
        entity_versions['[ENTITY_1]'] = ['[VERSION_1A]', '[VERSION_1B]', '[VERSION_1C]', '[VERSION_1D]', '[VERSION_1E]']
        entity_versions['[ENTITY_2]'] = ['[VERSION_2A]', '[VERSION_2B]', '[VERSION_2C]', '[VERSION_2D]', 
                                       '[VERSION_2E]', '[VERSION_2F]', '[VERSION_2G]', 
                                       '[VERSION_2H]', '[VERSION_2I]', '[VERSION_2J]']
        # Additional entities would be defined similarly
        
    # Make everything lowercase
    for ent_glob_name, ent_version_list in entity_versions.iteritems():
        for ind in range(len(ent_version_list)):
            ent_version_list[ind] = ent_version_list[ind].lower()
    
    return entity_versions
        
    
def change_nt_to_not(sent):
    """
    Change n't contractions to 'not' in a sentence.
    
    Args:
        sent (str): Input sentence
        
    Returns:
        str: Sentence with n't changed to 'not'
    """
    sent = sent.replace(" can't ", " cannot ").replace(" won't ", " will not ")
    res_sent = ""
    ind = 0
    
    while ind < len(sent):
        # Current character
        c = sent[ind]
        
        # Avoid out of range access
        if ind > len(sent) - 3:
            res_sent += c
            ind += 1
            continue
            
        # n't at the end of the sentence
        if ind == len(sent) - 3 and c == "n" and sent[ind + 1] == "'" and sent[ind + 2] == "t":
            res_sent += " not"
            break
            
        if ind == len(sent) - 4 and c == "n" and sent[ind + 1] == "'" and sent[ind + 2] == "t":
            res_sent += " not" + sent[ind + 3]
            break            
            
        if c == "n" and sent[ind + 1] == "'" and sent[ind + 2] == "t" and sent[ind + 3] == " ":
            res_sent += " not "
            ind += 4
            continue
            
        if c == "n" and sent[ind + 1] == "'" and sent[ind + 2] == "t" and sent[ind + 3] == ".":
            res_sent += " not."
            ind += 4
            continue            
            
        res_sent += c
        ind += 1
        
    return res_sent


def change_multi_dots_to_single_dot(sent):
    """
    Change multiple consecutive dots to a single dot.
    
    Args:
        sent (str): Input sentence
        
    Returns:
        str: Sentence with multiple dots changed to single dot
    """
    ind = 0
    res_sent = ""
    
    while ind < len(sent):
        c = sent[ind]
        if c == ".":
            res_sent += c
            ind2 = ind
            while ind2 < len(sent) and sent[ind2] == ".":
                ind2 += 1
            if ind2 < len(sent) and sent[ind2] != " ":
                res_sent += " "
            ind = ind2
        else:
            ind += 1
            res_sent += c
            
    return res_sent
            

def strip_non_ascii(sent):
    """
    Remove non-ASCII characters from a string.
    
    Args:
        sent (str): Input string
        
    Returns:
        str: String with only ASCII characters
    """
    stripped = (c for c in sent if 0 < ord(c) < 127)
    return ''.join(stripped)


def clean_sent(sent):
    """
    Clean a sentence for processing.
    
    This function:
    1. Removes non-ASCII characters
    2. Removes punctuations except ".", ",", ";", "!", "?", "'"
    3. Changes n't to not
    4. Changes multiple dots to a single dot
    
    Args:
        sent (str): Input sentence
        
    Returns:
        str: Cleaned sentence
    """
    sent = strip_non_ascii(sent)
    exclude = set(string.punctuation) - {".", ",", ";", "!", "?", "'"}
    sent = ''.join(ch for ch in sent if ch not in exclude)
    sent = change_nt_to_not(sent)
    sent = change_multi_dots_to_single_dot(sent)
    
    return sent


def is_entity_present(sent, entity):
    """
    Check if an entity is present in a sentence.
    
    Args:
        sent (str): Sentence to check
        entity (str): Entity to look for
        
    Returns:
        bool: True if the entity is present, False otherwise
    """
    # Remove punctuation except apostrophes
    sent = sent.translate(None, string.punctuation.replace("'", "")).lower()
    entity = entity.lower()
    
    if sent == entity:
        return True
        
    # Check if it appears as a separate word inside the text
    if (" " + entity + " ") in sent:
        return True
        
    # Check if it appears as a separate word at the beginning
    ind_first_match = sent.find(entity + " ")
    if ind_first_match == 0:
        return True
        
    # Check if it appears as a separate word at the end
    ind_last_match = sent.rfind(" " + entity)
    if ind_last_match == -1:
        return False
    if ind_last_match + len(entity) + 1 == len(sent):
        return True
        
    return False


def aggregate_who_are_entities(df_rels, output_file=None, top_num=-1, save_to_file=False, stem_rels=False, just_head_arg=False):
    """
    Aggregate entities from 'is' relations.
    
    Args:
        df_rels (pandas.DataFrame): DataFrame of relations
        output_file (str): Path to output file
        top_num (int): Number of top entities to get (-1 for all)
        save_to_file (bool): Whether to save to file
        stem_rels (bool): Whether to stem relations
        just_head_arg (bool): Whether to use just head arguments
        
    Returns:
        pandas.DataFrame: DataFrame of aggregated entities
    """
    df_is_rels = df_rels[np.logical_or(df_rels["rel"] == "is", df_rels["rel"] == "{is}")]
    df = get_top_extractions(df_is_rels, output_file, top_num, save_to_file, stem_rels, just_head_arg)
    
    return df


def create_equivalent_dict(df_is_rels, main_ent_name, ent_version_list):
    """
    Create a dictionary of equivalent entities.
    
    Args:
        df_is_rels (pandas.DataFrame): DataFrame of 'is' relations
        main_ent_name (str): Main entity name
        ent_version_list (list): List of entity versions
        
    Returns:
        dict: Dictionary of equivalent entities
    """
    dict_main_ent_equiv = {}
    
    for ind, item in df_is_rels.iterrows():
        equiv_ent = ""
        # Check if any of the entity versions are present in any of the arguments
        # Then take the other argument as their equivalent word (descriptive word)
        for ent_v in ent_version_list: 
            if "{" + ent_v + "}" in item["arg1"]:
                equiv_ent = item["arg2"]
                break
            if "{" + ent_v + "}" in item["arg2"]:
                equiv_ent = item["arg1"]
                break
                
        # If this entity is not the head word of any of the arguments, skip this row
        if not equiv_ent:
            continue
            
        equiv_ent_head = re.search(r'\{(.*)\}', equiv_ent).group(1).replace("{", "").replace("}", "")
        
        if equiv_ent_head not in dict_main_ent_equiv:
            dict_main_ent_equiv[equiv_ent_head] = {}
            dict_main_ent_equiv[equiv_ent_head]["count"] = 1
            dict_main_ent_equiv[equiv_ent_head]["versions"] = {}
            dict_main_ent_equiv[equiv_ent_head]["versions"][equiv_ent] = 1
        else:
            dict_main_ent_equiv[equiv_ent_head]["count"] += 1
            if equiv_ent not in dict_main_ent_equiv[equiv_ent_head]["versions"]:
                dict_main_ent_equiv[equiv_ent_head]["versions"][equiv_ent] = 1
            else:
                dict_main_ent_equiv[equiv_ent_head]["versions"][equiv_ent] += 1

    return dict_main_ent_equiv        
        
    
def save_entity_sorted_equivalents(dict_main_ent_equiv, main_ent_name, f):
    """
    Save sorted entity equivalents to a file.
    
    Args:
        dict_main_ent_equiv (dict): Dictionary of equivalent entities
        main_ent_name (str): Main entity name
        f (file): Output file
        
    Returns:
        None
    """
    print >>f, "*" * 60
    print >>f, "*" * ((59 - len(main_ent_name)) // 2), main_ent_name, "*" * ((59 - len(main_ent_name)) // 2)
    print >>f, "*" * 60
    print >>f, ""
    
    for s in sorted(dict_main_ent_equiv.iteritems(), key=lambda (x, y): y['count'], reverse=True):
        print >>f, s[0], "->", s[1]["count"]
        print >>f, "-" * 60
        
        for s_versions in sorted(s[1]["versions"].iteritems(), key=lambda (x, y): y, reverse=True):
            print >>f, s_versions
        print >>f, ""    
        

def write_df_to_csv(path_with_file_name, df_input, header=None):
    """
    Write a DataFrame to a CSV file.
    
    Args:
        path_with_file_name (str): Path to output file
        df_input (pandas.DataFrame): DataFrame to write
        header (list): List of column names
        
    Returns:
        None
    """
    if header is None:
        header = df_input.columns
    
    with open(path_with_file_name, 'wb') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerow(header)
        
        for ind, item in df_input.iterrows():
            csv_writer.writerow(item)
